'''
Created on Sep 15, 2011

@author: oabalbin
'''

import os
import sys
import glob


def read_files_folder(folderpath,ext):
    ''' '''
    # Read files in folder
    myfiles=[]
    for infile in glob.glob( os.path.join(folderpath, '*'+ext) ):
        myfiles.append(infile)
    return myfiles

def compute_frequency(files_list,ofile,ffile):
    '''
    chr1:12198 chr1:12198 G C 357 357
    '''
    ofile1=open(ofile,'w')
    ofile2=open(ffile,'w')
    for file in files_list:
        file=open(file)
        file.next()
        for l in file:
            f=l.strip('\n').split(' ')
            if f[0].startswith('rs'):
                if f[0]==f[1]:
                    freq = float(f[5])/(int(f[4])+int(f[5]))
                    chr, name =f[0], f[0]+'|'+f[2]+'>'+f[3]+'|'+str(freq)
                    ol = [chr, name,'\n']
                    ofile2.write(",".join(ol).replace(',','\t'))
                    continue
                else:
                    print 'if1',f
                    sys.exit(0)
                    continue

                
            if f[0]==f[1]:
                loc=f[0].split(':')
                freq = float(f[5])/(int(f[4])+int(f[5]))
                chr, start, end, name =loc[0],str(int(loc[1])-1),str(int(loc[1])+1), 'nors|'+f[2]+'>'+f[3]+'|'+str(freq)
                ol = [chr, start, end, name,'\n']
                ofile1.write(",".join(ol).replace(',','\t'))
            else:
                print 'if2',f
                sys.exit(0)



def get_hapmap_frequencies(folder, ext,ofile,ffile):
    '''
    '''
    files_list = read_files_folder(folder,ext)
    compute_frequency(files_list,ofile,ffile)
    


if __name__ == '__main__':

    folder = '/data/databases/hapmap/frequencies/freq/EUR/'
    ext='.ac'
    ofile = '/data/databases/hapmap/frequencies/freq/EUR/EUR_allele_freq_no_rsid.bed'
    ffile = '/data/databases/hapmap/frequencies/freq/EUR/EUR_allele_freq_rsid.bed'
    
    get_hapmap_frequencies(folder, ext,ofile,ffile)







        
        
    
    